package com.my.app.tools.core.service.other;

import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.my.app.tools.core.service.AbstractAppConfigService;
import com.my.app.tools.core.service.common.AppConfigService;
import com.my.app.tools.pojo.vo.EnterpriseInfoVO;
import com.my.app.tools.util.HttpGet;
import org.apache.commons.collections.CollectionUtils;
import org.apache.http.util.Asserts;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.util.List;

/**
 * 抓取企业信息
 *
 * @author guochaohui
 * @return
 * @date 2020-04-07 18:26
 */
public class SpiderEnterpriseInfoService extends AbstractAppConfigService {

    private String creditShanDongPageUrl;
    private int creditShanDongPageLength;
    private String creditShanDongJsoupTag;
    private int creditShanDongJsoupResultSize;
    private List<Integer> creditShanDongJsoupResultIndex;

    @Override
    public void reloadAppConfig() {
        creditShanDongPageUrl = AppConfigService.readStringConfig("/app-config/spider-enterprise/credit-shandong/search-url");
        String creditShanDongPageLength = AppConfigService.readStringConfig("/app-config/spider-enterprise/credit-shandong/page-length");
        this.creditShanDongPageLength = Integer.valueOf(creditShanDongPageLength).intValue();
        creditShanDongJsoupTag = AppConfigService.readStringConfig("/app-config/spider-enterprise/credit-shandong/jsoup-tag");
        String creditShanDongJsoupResultSize = AppConfigService.readStringConfig("/app-config/spider-enterprise/credit-shandong/jsoup-result-size");
        this.creditShanDongJsoupResultSize = Integer.valueOf(creditShanDongJsoupResultSize).intValue();
        String creditShanDongJsoupResultIndex = AppConfigService.readStringConfig("/app-config/spider-enterprise/credit-shandong/jsoup-result-index");
        List<String> creditShanDongJsoupResultIndexs = Splitter.on(",").trimResults().splitToList(creditShanDongJsoupResultIndex);
        this.creditShanDongJsoupResultIndex = Lists.newArrayList();
        for (String index : creditShanDongJsoupResultIndexs) {
            this.creditShanDongJsoupResultIndex.add(Integer.valueOf(index));
        }
    }

    /**
     * 查询山东企业信息
     *
     * @param keyword
     * @param maxCount
     * @return
     * @author guochaohui
     * @date 2020-04-07 18:26
     */
    public List<EnterpriseInfoVO> queryCreditWithShanDong(String keyword, int maxCount) {
        List<EnterpriseInfoVO> list = Lists.newArrayList();
        // 处理中文
        keyword = HttpGet.encode(keyword);
        int maxPage = maxCount / creditShanDongPageLength;
        for (int page = 0; page < maxPage; page++) {
            String url = String.format(creditShanDongPageUrl, keyword, maxCount / creditShanDongPageLength);
            String pageHtml = HttpGet.get(url, null);
            Asserts.notBlank(pageHtml, "抓取数据失败，请稍候再试");
            Document doc = Jsoup.parse(pageHtml);
            Elements tdList = doc.select(creditShanDongJsoupTag);
            if (CollectionUtils.isEmpty(tdList)) {
                continue;
            }
            for (int i = 0; i < tdList.size(); i += creditShanDongJsoupResultSize) {
                List<String> results = Lists.newArrayList();
                for (Integer index : creditShanDongJsoupResultIndex) {
                    results.add(tdList.get(i + index).text());
                }
                EnterpriseInfoVO vo = new EnterpriseInfoVO(results.get(0), results.get(1), results.get(2));
                list.add(vo);
            }
        }
        return list;
    }

}
